setwd("C:/Users/cheik/Desktop/Data Viz and Stats Inference II/Data Viz/")
getwd()
## [1] "C:/Users/cheik/Desktop/Data Viz and Stats Inference II/Data Viz"
college <- read.csv("C:/Users/cheik/Desktop/Data Viz and Stats Inference II/Data Viz/Data/US_News_National_University_Rankings.csv")
head(college)
## Rank University City State Type Tuition.in.state
## 1 1 Princeton University Princeton NJ Private 41820
## 2 2 Harvard University Cambridge MA Private 43938
## 3 3 Yale University New Haven CT Private 45800
## 4 4 Columbia University New York NY Private 51008
## 5 4 Stanford University Stanford CA Private 44757
## 6 4 University of Chicago Chicago IL Private 48253
## Tuiition.out.of.state Enrollment Acceptance.rate..2013.
## 1 41820 8014 7.4
## 2 43938 19882 5.8
## 3 45800 12109 6.9
## 4 51008 23606 6.9
## 5 44757 18136 5.7
## 6 48253 12539 8.8
## Freshman.retention.rate X6yr.Grad.rate Region
## 1 98 97 NE
## 2 97 97 NE
## 3 99 98 NE
## 4 99 96 NE
## 5 98 96 W
## 6 99 93 MW
nrow(college)
## [1] 280
library(tidyverse)
## -- Attaching packages ------------------------------------------ tidyverse 1.2.1 --
## v ggplot2 3.1.0 v purrr 0.2.5
## v tibble 2.0.1 v dplyr 0.7.8
## v tidyr 0.8.2 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.3.0
## -- Conflicts --------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
groupbytype <- college %>% group_by (State, Type) %>%
summarise(n=n())
public <- groupbytype %>% filter(Type =='Public')
public <- as.data.frame(public)
public$Type <- names("Public")
private <- groupbytype %>% filter(Type =='Private')
private <- as.data.frame(private)
private$Type <- names("Private")
private
## State n
## 1 AZ 2
## 2 CA 14
## 3 CO 2
## 4 CT 1
## 5 DC 5
## 6 DE 1
## 7 FL 5
## 8 GA 2
## 9 IL 8
## 10 IN 1
## 11 KY 1
## 12 LA 1
## 13 MA 9
## 14 MD 1
## 15 MI 1
## 16 MN 4
## 17 MO 3
## 18 NC 2
## 19 NH 1
## 20 NJ 3
## 21 NY 15
## 22 OH 4
## 23 OK 1
## 24 PA 7
## 25 RI 1
## 26 TN 2
## 27 TX 5
## 28 UT 1
## 29 VA 1
## 30 WI 3
bystate <-merge(public,private,by="State", all.x = T ,all.y = T)
colnames(bystate) <- c("State","Public", "Private")
#bystate
p <- public%>%
plot_ly() %>%
add_trace(x = ~State, y = ~n, type = 'bar',
text = "Public", textposition = 'auto',
marker = list(color = 'rgb(158,202,225)',
line = list(color = 'rgb(8,48,107)', width = 1.5)))
p
p <- plot_ly(data = bystate, x = ~State, y = ~Public, type = 'bar',
text ="Public",
name ="Public",
marker = list(color = 'rgb(158,202,225)',
line = list(color = 'rgb(8,48,107)', width = 1.5))) %>%
add_trace(x = ~State, y = ~Private, type = 'bar',
text = "Private",
name = "Private",
marker = list(color = 'rgb(58,200,225)',
line = list(color = 'rgb(8,48,107)', width = 1.5))) %>%
layout(title = "Number of Colleges (Public & Private) By State",
barmode = 'group',
xaxis = list(title = "State"),
yaxis = list(title = "Count")) %>%
config(collaborate = FALSE,
displaylogo = FALSE, modeBarButtonsToRemove = list("resetScale2d",
"sendDataToCloud", "zoom2d", "zoomIn2d", "zoomOut2d",
"pan2d", "select2d", "lasso2d", "hoverClosestCartesian",
"hoverCompareCartesian", "hoverClosestGl2d", "hoverClosestPie",
"toggleHover", "resetViews", "toggleSpikelines"))
p
## Warning: Ignoring 1 observations
## Warning: Ignoring 21 observations